# -*- coding: utf-8 -*-
#	Copyright 2005 Spike^ekipS <spikeekips@gmail.com>
#
#	   This program is free software; you can redistribute it and/or modify
#	it under the terms of the GNU General Public License as published by
#	the Free Software Foundation; either version 2 of the License, or
#	(at your option) any later version.
#
#	This program is distributed in the hope that it will be useful,
#	but WITHOUT ANY WARRANTY; without even the implied warranty of
#	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#	GNU General Public License for more details.
#
#	You should have received a copy of the GNU General Public License
#	along with this program; if not, write to the Free Software
#	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import urllib, urllib2
try:
	import base64, binascii
except:
	base64 = binascii = None

try:
	import gzip
except:
	gzip = None
try:
	import zlib
except:
	zlib = None

try:
	from cStringIO import StringIO as _StringIO
except:
	from StringIO import StringIO as _StringIO

class URLHandler ( \
			urllib2.HTTPDigestAuthHandler, \
			urllib2.HTTPRedirectHandler, \
			urllib2.HTTPDefaultErrorHandler \
		) :
	def http_error_default (self, req, fp, code, msg, headers):
		if ((code / 100) == 3) and (code != 304):
			return self.http_error_302(req, fp, code, msg, headers)
		infourl = urllib.addinfourl(fp, headers, req.get_full_url())
		infourl.status = code
		return infourl

	def http_error_302(self, req, fp, code, msg, headers):
		if headers.dict.has_key("location"):
			infourl = urllib2.HTTPRedirectHandler.http_error_302( \
				self, req, fp, code, msg, headers)
		else:
			infourl = urllib.addinfourl(fp, headers, req.get_full_url())
		if not hasattr(infourl, "status"):
			infourl.status = code
		return infourl

	def http_error_301 (self, req, fp, code, msg, headers):
		if headers.dict.has_key("location"):
			infourl = urllib2.HTTPRedirectHandler.http_error_301( \
				self, req, fp, code, msg, headers)
		else:
			infourl = urllib.addinfourl(fp, headers, req.get_full_url())
		if not hasattr(infourl, "status"):
			infourl.status = code
		return infourl

	http_error_300 = http_error_302
	http_error_303 = http_error_302
	http_error_307 = http_error_302

	def http_error_401(self, req, fp, code, msg, headers):
		# Check if
		# - server requires digest auth, AND
		# - we tried (unsuccessfully) with basic auth, AND
		# - we"re using Python 2.3.3 or later (digest auth is irreparably broken in earlier versions)
		# If all conditions hold, parse authentication information
		# out of the Authorization header we sent the first time
		# (for the username and password) and the WWW-Authenticate
		# header the server sent back (for the realm) and retry
		# the request with the appropriate digest auth headers instead.
		# This evil genius hack has been brought to you by Aaron Swartz.
		host = urlparse.urlparse(req.get_full_url())[1]
		try:
			assert sys.version.split()[0] >= "2.3.3"
			assert base64 != None
			user, passw = base64.decodestring( \
				req.headers["Authorization"].split(" ")[1]).split(":")
			realm = re.findall("realm='([^']*)'", headers["WWW-Authenticate"])[0]
			self.add_password(realm, host, user, passw)
			retry = self.http_error_auth_reqed("www-authenticate", host, req, headers)
			self.reset_retry_count()
			return retry
		except:
			return self.http_error_default(req, fp, code, msg, headers)

class HTTPPageGetter :
	AGENT = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.12) Gecko/20051010 Firefox/1.0.7 (Ubuntu package 1.0.7)"
	#agent = "aguzzime"

	ACCEPT_HEADER = "application/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1"

	def get (self, url) :
		# test for inline user:password for basic auth
		auth = None
		if base64:
			urltype, rest = urllib.splittype(url)
			realhost, rest = urllib.splithost(rest)
			if realhost:
				user_passwd, realhost = urllib.splituser(realhost)
				if user_passwd:
					url = "%s://%s%s" % (urltype, realhost, rest)
					auth = base64.encodestring(user_passwd).strip()

		# try to open with urllib2 (to use optional headers)
		request = urllib2.Request(url)
		request.add_header("User-Agent", self.AGENT)

		if gzip and zlib:
			request.add_header("Accept-encoding", "gzip, deflate")
		elif gzip:
			request.add_header("Accept-encoding", "gzip")
		elif zlib:
			request.add_header("Accept-encoding", "deflate")
		else:
			request.add_header("Accept-encoding", "")

		#request.add_header("Accept-encoding", "")

		if auth:
			request.add_header("Authorization", "Basic %s" % auth)

		if self.ACCEPT_HEADER:
			request.add_header("Accept", self.ACCEPT_HEADER)

		request.add_header("A-IM", "feed") # RFC 3229 support
		opener = apply(urllib2.build_opener, tuple([URLHandler()]))
		opener.addheaders = [] # RMK - must clear so we only send our custom User-Agent

		try:
			__response = opener.open(request)
		except Exception, e :
			raise
		else:
			opener.close() # JohnD

		__data = "".join(__response.readlines())
		if gzip and __response.headers.get("content-encoding", "") == "gzip":
			try:
				__data = gzip.GzipFile(fileobj = _StringIO(__data)).read()
			except Exception, e:
				__data = str()
		elif zlib and __response.headers.get("content-encoding", "") == "deflate":
			try:
				__data = zlib.decompress(__data, -zlib.MAX_WBITS)
			except Exception, e:
				__data = str()

		return __data

class HTTPPageGetterNew :
	AGENT = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.12) Gecko/20051010 Firefox/1.0.7 (Ubuntu package 1.0.7)"
	#agent = "aguzzime"

	ACCEPT_HEADER = "application/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1"

	def get (self, url) :
		# test for inline user:password for basic auth
		auth = None
		if base64:
			urltype, rest = urllib.splittype(url)
			realhost, rest = urllib.splithost(rest)
			if realhost:
				user_passwd, realhost = urllib.splituser(realhost)
				if user_passwd:
					url = "%s://%s%s" % (urltype, realhost, rest)
					auth = base64.encodestring(user_passwd).strip()

		# try to open with urllib2 (to use optional headers)
		request = urllib2.Request(url)
		request.add_header("User-Agent", self.AGENT)

		if gzip and zlib:
			request.add_header("Accept-encoding", "gzip, deflate")
		elif gzip:
			request.add_header("Accept-encoding", "gzip")
		elif zlib:
			request.add_header("Accept-encoding", "deflate")
		else:
			request.add_header("Accept-encoding", "")

		#request.add_header("Accept-encoding", "")

		if auth:
			request.add_header("Authorization", "Basic %s" % auth)

		if self.ACCEPT_HEADER:
			request.add_header("Accept", self.ACCEPT_HEADER)

		request.add_header("A-IM", "feed") # RFC 3229 support
		opener = apply(urllib2.build_opener, tuple([URLHandler()]))
		opener.addheaders = [] # RMK - must clear so we only send our custom User-Agent

		try:
			__response = opener.open(request)
		except Exception, e :
			print dir(e)
			raise
		else:
			opener.close() # JohnD

		__data = "".join(__response.readlines())
		if gzip and __response.headers.get("content-encoding", "") == "gzip":
			try:
				__data = gzip.GzipFile(fileobj = _StringIO(__data)).read()
			except Exception, e:
				__data = str()
		elif zlib and __response.headers.get("content-encoding", "") == "deflate":
			try:
				__data = zlib.decompress(__data, -zlib.MAX_WBITS)
			except Exception, e:
				__data = str()

		return (__data, __response.headers, )

def get_page_new (url) :
	try :
		url = str(url)
	except :
		return None

	__getter = HTTPPageGetterNew()
	return __getter.get(url)

def get_page (url) :
	try :
		url = str(url)
	except :
		return None

	__getter = HTTPPageGetter()
	return __getter.get(url)


"""
Description
-----------


ChangeLog
---------


Usage
-----


"""

__author__ =  "Spike^ekipS <spikeekips@gmail.com>"
__version__=  "0.1"
__nonsense__ = ""

__file__ = "pageGetter.py"


